import urllib.request
import codecs
import re

html=urllib.request.urlopen("https://www.zhihu.com/people/excited-vczh")  #输入个人主页的链接
byte=html.read()
strg=byte.decode('utf8')
file=codecs.open('vczh.html','w','utf8')
file.write(strg)
file.close()
regex=re.findall('class="question_link"\s.*\s*href="(/question/\S+)"',strg)
reg=set(regex)
for u in reg :
	#print(u)
	url='https://www.zhihu.com'+u
	#print(url)
	try:
		byt=urllib.request.urlopen(url).read()
		strg=byt.decode('utf8')
		pict=re.findall(r'data-original=\"(https://pic[0-9]+.zhimg.com/\w+.(?:jpeg|jpg|png))\"',strg)
		#print(pict)
		pict_set=set(pict);
		filename=re.findall('/question/(\w+)',u)
		filename=filename[0]+'.html'
		print(filename)
		for n in pict_set :
			picname=re.findall(r'https://pic[0-9].zhimg.com/(.+)',n)
			print(picname)
			strg_pict=urllib.request.urlopen(n).read()
			pic_fd=codecs.open(picname[0],'wb')
			pic_fd.write(strg_pict)
		#print(pict)
		fd=codecs.open(filename,'w','utf8')
		fd.write(strg)
		fd.close()
	except:
		pass